
import requests
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

import urllib.request
from lxml import etree



def crow(i):
    #构造第i页网址
    url = 'https://movie.douban.com/top250?start='+ str(i*25)
    # 发送请求 并且获取网页源码
    req = urllib.request.urlopen(url).read().decode('utf-8')
    # 将req翻译为xpath识别的格式
    html = etree.HTML(req)

    #定位到LI标签(list
    datas = html.xpath('//*[@id="content"]/div/div[1]/ol/li')
    a = 0
    for data in datas:
        data_title = data.xpath('div/div[2]/div[1]/a/span[1]/text()')
        data_img = data.xpath('div/div[1]/a/img/@src')
        data_edi = data.xpath('div/div[2]/div[2]/p[1]/text()')
        print('No:'+str(i*25+1+a))
        with open('douban.txt','a',encoding='utf-8') as f:
            picname = '/Users/longzhi/Desktop/L002/top250/'+'No:'+ str(i*25+1+a) + data_title[0]+'.jpg' #路径+文件名+文件格式
            f.write('No:'+str(i*25+1+a)+'\n')
            f.write(data_title[0]+'\n')
            f.write(data_edi[0] + '\n')
            #下载图片到本地 路径为picname
            urllib.request.urlretrieve(data_img[0],filename=picname) #保存jpg文件
        a+=1
for i in range(10):
    crow(i)

from bs4 import BeautifulSoup

